# RUN: llc -march=amdgcn -mcpu=gfx803 -run-pass si-memory-legalizer %s -o - | FileCheck -check-prefix=GCN %s

---

# GCN-LABEL: name: load_singlethread_unordered

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_READ_B32
# GCN-NOT:   S_WAITCNT
# GCN:       FLAT_STORE_DWORD

name:            load_singlethread_unordered
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") unordered (s32) from `i32 addrspace(3)* undef`)
    $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: load_singlethread_monotonic

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_READ_B32
# GCN-NOT:   S_WAITCNT
# GCN:       FLAT_STORE_DWORD

name:            load_singlethread_monotonic
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") monotonic (s32) from `i32 addrspace(3)* undef`)
    $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: load_singlethread_acquire

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_READ_B32
# GCN-NOT:   S_WAITCNT
# GCN:       FLAT_STORE_DWORD

name:            load_singlethread_acquire
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") acquire (s32) from `i32 addrspace(3)* undef`)
    $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: load_singlethread_seq_cst

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_READ_B32
# GCN-NOT:   S_WAITCNT
# GCN:       FLAT_STORE_DWORD

name:            load_singlethread_seq_cst
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") seq_cst (s32) from `i32 addrspace(3)* undef`)
    $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: load_wavefront_unordered

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_READ_B32
# GCN-NOT:   S_WAITCNT
# GCN:       FLAT_STORE_DWORD

name:            load_wavefront_unordered
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") unordered (s32) from `i32 addrspace(3)* undef`)
    $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: load_wavefront_monotonic

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_READ_B32
# GCN-NOT:   S_WAITCNT
# GCN:       FLAT_STORE_DWORD

name:            load_wavefront_monotonic
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") monotonic (s32) from `i32 addrspace(3)* undef`)
    $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: load_wavefront_acquire

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_READ_B32
# GCN-NOT:   S_WAITCNT
# GCN:       FLAT_STORE_DWORD

name:            load_wavefront_acquire
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") acquire (s32) from `i32 addrspace(3)* undef`)
    $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: load_wavefront_seq_cst

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_READ_B32
# GCN-NOT:   S_WAITCNT
# GCN:       FLAT_STORE_DWORD

name:            load_wavefront_seq_cst
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") seq_cst (s32) from `i32 addrspace(3)* undef`)
    $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: load_workgroup_unordered

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_READ_B32
# GCN-NOT:   S_WAITCNT
# GCN:       FLAT_STORE_DWORD

name:            load_workgroup_unordered
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") unordered (s32) from `i32 addrspace(3)* undef`)
    $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: load_workgroup_monotonic

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_READ_B32
# GCN-NOT:   S_WAITCNT
# GCN:       FLAT_STORE_DWORD

name:            load_workgroup_monotonic
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") monotonic (s32) from `i32 addrspace(3)* undef`)
    $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: load_workgroup_acquire

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_READ_B32
# GCN-NOT:   S_WAITCNT
# GCN:       FLAT_STORE_DWORD

name:            load_workgroup_acquire
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") acquire (s32) from `i32 addrspace(3)* undef`)
    $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: load_workgroup_seq_cst

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_READ_B32
# GCN-NOT:   S_WAITCNT
# GCN:       FLAT_STORE_DWORD

name:            load_workgroup_seq_cst
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") seq_cst (s32) from `i32 addrspace(3)* undef`)
    $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: load_agent_unordered

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_READ_B32
# GCN-NOT:   S_WAITCNT
# GCN:       FLAT_STORE_DWORD

name:            load_agent_unordered
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") unordered (s32) from `i32 addrspace(3)* undef`)
    $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: load_agent_monotonic

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_READ_B32
# GCN-NOT:   S_WAITCNT
# GCN:       FLAT_STORE_DWORD

name:            load_agent_monotonic
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") monotonic (s32) from `i32 addrspace(3)* undef`)
    $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: load_agent_acquire

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_READ_B32
# GCN-NOT:   S_WAITCNT
# GCN:       FLAT_STORE_DWORD

name:            load_agent_acquire
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") acquire (s32) from `i32 addrspace(3)* undef`)
    $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: load_agent_seq_cst

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_READ_B32
# GCN-NOT:   S_WAITCNT
# GCN:       FLAT_STORE_DWORD

name:            load_agent_seq_cst
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") seq_cst (s32) from `i32 addrspace(3)* undef`)
    $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: load_system_unordered

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_READ_B32
# GCN-NOT:   S_WAITCNT
# GCN:       FLAT_STORE_DWORD

name:            load_system_unordered
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") unordered (s32) from `i32 addrspace(3)* undef`)
    $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: load_system_monotonic

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_READ_B32
# GCN-NOT:   S_WAITCNT
# GCN:       FLAT_STORE_DWORD

name:            load_system_monotonic
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") monotonic (s32) from `i32 addrspace(3)* undef`)
    $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: load_system_acquire

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_READ_B32
# GCN-NOT:   S_WAITCNT
# GCN:       FLAT_STORE_DWORD

name:            load_system_acquire
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") acquire (s32) from `i32 addrspace(3)* undef`)
    $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: load_system_seq_cst

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_READ_B32
# GCN-NOT:   S_WAITCNT
# GCN:       FLAT_STORE_DWORD

name:            load_system_seq_cst
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") seq_cst (s32) from `i32 addrspace(3)* undef`)
    $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
    FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: store_singlethread_unordered

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_WRITE_B32
# GCN-NOT:   S_WAITCNT
# GCN:       S_ENDPGM 0

name:            store_singlethread_unordered
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
    DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") unordered (s32) into `i32 addrspace(3)* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: store_singlethread_monotonic

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_WRITE_B32
# GCN-NOT:   S_WAITCNT
# GCN:       S_ENDPGM 0

name:            store_singlethread_monotonic
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
    DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") monotonic (s32) into `i32 addrspace(3)* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: store_singlethread_release

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_WRITE_B32
# GCN-NOT:   S_WAITCNT
# GCN:       S_ENDPGM 0

name:            store_singlethread_release
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
    DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") release (s32) into `i32 addrspace(3)* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: store_singlethread_seq_cst

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_WRITE_B32
# GCN-NOT:   S_WAITCNT
# GCN:       S_ENDPGM 0

name:            store_singlethread_seq_cst
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
    DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") seq_cst (s32) into `i32 addrspace(3)* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: store_wavefront_unordered

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_WRITE_B32
# GCN-NOT:   S_WAITCNT
# GCN:       S_ENDPGM 0

name:            store_wavefront_unordered
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
    DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") unordered (s32) into `i32 addrspace(3)* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: store_wavefront_monotonic

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_WRITE_B32
# GCN-NOT:   S_WAITCNT
# GCN:       S_ENDPGM 0

name:            store_wavefront_monotonic
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
    DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") monotonic (s32) into `i32 addrspace(3)* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: store_wavefront_release

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_WRITE_B32
# GCN-NOT:   S_WAITCNT
# GCN:       S_ENDPGM 0

name:            store_wavefront_release
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
    DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") release (s32) into `i32 addrspace(3)* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: store_wavefront_seq_cst

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_WRITE_B32
# GCN-NOT:   S_WAITCNT
# GCN:       S_ENDPGM 0

name:            store_wavefront_seq_cst
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
    DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") seq_cst (s32) into `i32 addrspace(3)* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: store_workgroup_unordered

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_WRITE_B32
# GCN-NOT:   S_WAITCNT
# GCN:       S_ENDPGM 0

name:            store_workgroup_unordered
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
    DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") unordered (s32) into `i32 addrspace(3)* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: store_workgroup_monotonic

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_WRITE_B32
# GCN-NOT:   S_WAITCNT
# GCN:       S_ENDPGM 0

name:            store_workgroup_monotonic
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
    DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") monotonic (s32) into `i32 addrspace(3)* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: store_workgroup_release

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_WRITE_B32
# GCN-NOT:   S_WAITCNT
# GCN:       S_ENDPGM 0

name:            store_workgroup_release
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
    DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") release (s32) into `i32 addrspace(3)* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: store_workgroup_seq_cst

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_WRITE_B32
# GCN-NOT:   S_WAITCNT
# GCN:       S_ENDPGM 0

name:            store_workgroup_seq_cst
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
    DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") seq_cst (s32) into `i32 addrspace(3)* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: store_agent_unordered

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_WRITE_B32
# GCN-NOT:   S_WAITCNT
# GCN:       S_ENDPGM 0

name:            store_agent_unordered
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
    DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") unordered (s32) into `i32 addrspace(3)* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: store_agent_monotonic

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_WRITE_B32
# GCN-NOT:   S_WAITCNT
# GCN:       S_ENDPGM 0

name:            store_agent_monotonic
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
    DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") monotonic (s32) into `i32 addrspace(3)* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: store_agent_release

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_WRITE_B32
# GCN-NOT:   S_WAITCNT
# GCN:       S_ENDPGM 0

name:            store_agent_release
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
    DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") release (s32) into `i32 addrspace(3)* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: store_agent_seq_cst

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_WRITE_B32
# GCN-NOT:   S_WAITCNT
# GCN:       S_ENDPGM 0

name:            store_agent_seq_cst
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
    DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") seq_cst (s32) into `i32 addrspace(3)* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: store_system_unordered

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_WRITE_B32
# GCN-NOT:   S_WAITCNT
# GCN:       S_ENDPGM 0

name:            store_system_unordered
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
    DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") unordered (s32) into `i32 addrspace(3)* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: store_system_monotonic

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_WRITE_B32
# GCN-NOT:   S_WAITCNT
# GCN:       S_ENDPGM 0

name:            store_system_monotonic
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
    DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") monotonic (s32) into `i32 addrspace(3)* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: store_system_release

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_WRITE_B32
# GCN-NOT:   S_WAITCNT
# GCN:       S_ENDPGM 0

name:            store_system_release
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
    DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") release (s32) into `i32 addrspace(3)* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: store_system_seq_cst

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_WRITE_B32
# GCN-NOT:   S_WAITCNT
# GCN:       S_ENDPGM 0

name:            store_system_seq_cst
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
    DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") seq_cst (s32) into `i32 addrspace(3)* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: atomicrmw_singlethread_unordered

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_WRXCHG_RTN_B32
# GCN-NOT:   S_WAITCNT
# GCN:       S_ENDPGM 0

name:            atomicrmw_singlethread_unordered
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
    $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") unordered (s32) into `i32 addrspace(3)* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: atomicrmw_singlethread_monotonic

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_WRXCHG_RTN_B32
# GCN-NOT:   S_WAITCNT
# GCN:       S_ENDPGM 0

name:            atomicrmw_singlethread_monotonic
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
    $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") monotonic (s32) into `i32 addrspace(3)* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: atomicrmw_singlethread_acquire

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_WRXCHG_RTN_B32
# GCN-NOT:   S_WAITCNT
# GCN:       S_ENDPGM 0

name:            atomicrmw_singlethread_acquire
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
    $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") acquire (s32) into `i32 addrspace(3)* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: atomicrmw_singlethread_release

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_WRXCHG_RTN_B32
# GCN-NOT:   S_WAITCNT
# GCN:       S_ENDPGM 0

name:            atomicrmw_singlethread_release
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
    $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") release (s32) into `i32 addrspace(3)* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: atomicrmw_singlethread_acq_rel

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_WRXCHG_RTN_B32
# GCN-NOT:   S_WAITCNT
# GCN:       S_ENDPGM 0

name:            atomicrmw_singlethread_acq_rel
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
    $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") acq_rel (s32) into `i32 addrspace(3)* undef`)
    S_ENDPGM 0

...
---

# GCN-LABEL: name: atomicrmw_singlethread_seq_cst

# GCN-LABEL: bb.0:
# GCN-NOT:   S_WAITCNT
# GCN:       DS_WRXCHG_RTN_B32
# GCN-NOT:   S_WAITCNT
# GCN:       S_ENDPGM 0

name:            atomicrmw_singlethread_seq_cst
body:             |
  bb.0:
    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4)
    $m0 = S_MOV_B32 -1
    $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
    $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
    $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") seq_cst (s32) into `i32 addrspace(3)* undef`)
    S_ENDPGM 0

...
